Background: This is the analysis script for Study 1 of the 2025 Cogsci Paper: “Adults hold two parallel causal frameworks for reasoning about people’s minds, actions and bodies” by Joseph Outa and Shari Liu.

Study 1 presented two tasks in a random order in a within-subjects design: a Freesorting Task and a Causal Task. Subjects sorted 15 items in a sorting canvas, and rated causal relatedness between all 210 pairs of the 15 items (excluding self pairs).

The N was 50 adults, recruited online via Cloud Research Connect. Study demo: https://liulaboratory-experiments.github.io/mbc/study1

The two research questions are: (1) How do subjects sort items? Do they sort based on three latent causes, 2 latent causes, 6 latent causes, or feature-based similarity. (2) Do subjects’ direct cause representations (from the Causal Task) reflect latent cause boundaries or do they go beyond, and if so, what is their structure?

The manuscript and project materials can be found at: https://github.com/liulaboratory-experiments/mind-body-action-causation

Script description: This script:-
- proceeds from the output of the preprocessing script.
- takes as input data_processed_step1.csv, and data_model_prediction.csv - plots Sorting and Causal Task rdms, gets correlations between empirical and models RDM, gets within subject correlations.
- outputs data_freesort_mins_causals.csv for use in study 2 script for item selection.

1 Setup

1.1 load packages

1.2 Set variables

# variables
desired_order <- c("see something", "hear something", "choose what to do", 
                   "remember something", "think about something", "reach for something", 
                   "sit down", "jump up and down", "kick something", "take a walk", 
                   "get tired", "become hungry", "feel scared", "experience pain", "get sick")
domain_order <- c("mind", "action", "bio")
freesort_columns <- c("subject_id", "x", "y", "item")
freesort_columns_with_trial_order <- c("subject_id", "x", "y", "item", "trial_type", "trial_type_order")
causation_columns <- c("subject_id", "itemA", "itemB", "response")

# paths
## script inputs
base_path <- here("code", "study1")
snapshots_input_path <- paste0(base_path, "/snapshots_study1_preprocessing_analysis") 
data_input_path <- paste0(snapshots_input_path, "/data_processed_step1.csv")
model_predictions_input_path <- paste0(snapshots_input_path, "/data_model_predictions.csv") 
rdms_input_path <- paste0(base_path, "/figures/theoretical-rdms")

## script outputs
snapshots_output_path <- paste0(base_path, "/snapshots_study1_final_analysis")
rdms_outputs_path <- paste0(base_path, "/figures")
freesort_minus_causal_distances_output_path <- paste0(snapshots_output_path, "/freesort_minus_causals.csv")

## saved dataframes (not to be used in subsequent scripts, but saving for efficiency)
data_processed_output_path <- paste0(snapshots_output_path, "/data_processed_step2.csv") #not used in a subsequent script
correlations_output_path <- paste0(snapshots_output_path, "/correlations.csv") #not used in this script

1.3 Read data

d <- read_csv(data_input_path)
## Rows: 12200 Columns: 19
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): subject_id, trial_type, trial_type_order, itemA, itemB, categoryA, ...
## dbl (9): trial_index, question_index, response, correct_response, original_t...
## lgl (2): attention_check, correct
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

1.4 Reshape data

Create separate data frames for Sorting Task, Causal Task and a combined data frame

df_causal <- d %>%
  filter(trial_type == "causation", !attention_check, itemA != itemB) %>%
  select(all_of(causation_columns)) %>%
  
  #normalize causal distances and flip scale
  mutate(
    causal_distance = (1 - response / 100 - min(1 - response / 100, na.rm = TRUE)) /
                            (max(1 - response / 100, na.rm = TRUE) - min(1 - response / 100, na.rm = TRUE))
  ) %>%
  select(subject_id, itemA, itemB, causal_distance) %>% 
  mutate(domain_itemA = case_when(itemA == "hear something" ~ "mind", #labels for first item of pair
                             itemA == "choose what to do" ~ "mind",
                             itemA == "remember something" ~ "mind",
                             itemA == "think about something" ~ "mind",
                             itemA == "see something" ~ "mind",
                             itemA == "reach for something" ~ "action",
                             itemA == "sit down" ~ "action", 
                             itemA == "jump up and down" ~ "action",
                             itemA == "kick something" ~ "action",
                             itemA == "take a walk" ~ "action",
                             itemA == "get tired" ~ "bio", 
                             itemA == "become hungry" ~ "bio",
                             itemA == "feel scared"~ "bio",
                             itemA == "experience pain"~ "bio",
                             itemA == "get sick"~ "bio"),
         domain_itemB = case_when(itemB == "hear something" ~ "mind", 
                               itemB == "choose what to do" ~ "mind",
                               itemB == "remember something" ~ "mind",
                               itemB == "think about something" ~ "mind",
                               itemB == "see something" ~ "mind",
                               itemB == "reach for something" ~ "action",
                               itemB == "sit down" ~ "action", 
                               itemB == "jump up and down" ~ "action",
                               itemB == "kick something" ~ "action",
                               itemB == "take a walk" ~ "action",
                               itemB == "get tired" ~ "bio", 
                               itemB == "become hungry" ~ "bio",
                               itemB == "feel scared"~ "bio",
                               itemB == "experience pain"~ "bio",
                               itemB == "get sick"~ "bio")) %>% 
  relocate(domain_itemA, .after = "itemB") %>% 
  relocate(domain_itemB, .after = "domain_itemA") %>% 
  mutate(itemA = factor(itemA, levels = desired_order),
         itemB = factor(itemB, levels = desired_order)) 


df_freesort <- d %>%
  filter(trial_type == "free_sort") %>%
  select(all_of(freesort_columns_with_trial_order)) %>%
  rename(item_freesort = item) %>%
  
  #expand pairs within each subject
  group_by(subject_id, trial_type_order) %>%
  summarize(pairs = list(expand.grid(itemA = item_freesort, itemB = item_freesort, stringsAsFactors = FALSE)), .groups = "drop") %>%
  unnest(pairs) %>%
  filter(itemA != itemB) %>%
  
  #join in freesort coordinates
  left_join(d %>% filter(trial_type == "free_sort") %>%
              select(subject_id, itemA = item, x, y),
            by = c("subject_id", "itemA")) %>%
  rename(x_itemA = x, y_itemA = y) %>%
  
  left_join(d %>% filter(trial_type == "free_sort") %>%
              select(subject_id, itemB = item, x, y),
            by = c("subject_id", "itemB")) %>%
  rename(x_itemB = x, y_itemB = y) %>%
  
  group_by(subject_id) %>% 
  
  #compute and normalize Euclidean distances
  mutate(
    euclidean_distance = sqrt((x_itemA - x_itemB)^2 + (y_itemA - y_itemB)^2),
    freesort_distance = round( (euclidean_distance - min(euclidean_distance, na.rm = TRUE)) /
                    (max(euclidean_distance, na.rm = TRUE) - min(euclidean_distance, na.rm = TRUE)),2),
    
    trial_type_order = ifelse(trial_type_order == "first", "causation_second", "causation_first"),
    
    itemA = factor(itemA, levels = desired_order),
    itemB = factor(itemB, levels = desired_order)) %>%
  select(subject_id, itemA, itemB, freesort_distance, trial_type_order) %>%
  ungroup() %>% 
  mutate(domain_itemA = case_when(itemA == "hear something" ~ "mind", #labels for first item of pair
                             itemA == "choose what to do" ~ "mind",
                             itemA == "remember something" ~ "mind",
                             itemA == "think about something" ~ "mind",
                             itemA == "see something" ~ "mind",
                             itemA == "reach for something" ~ "action",
                             itemA == "sit down" ~ "action", 
                             itemA == "jump up and down" ~ "action",
                             itemA == "kick something" ~ "action",
                             itemA == "take a walk" ~ "action",
                             itemA == "get tired" ~ "bio", 
                             itemA == "become hungry" ~ "bio",
                             itemA == "feel scared"~ "bio",
                             itemA == "experience pain"~ "bio",
                             itemA == "get sick"~ "bio"),
         domain_itemB = case_when(itemB == "hear something" ~ "mind", 
                               itemB == "choose what to do" ~ "mind",
                               itemB == "remember something" ~ "mind",
                               itemB == "think about something" ~ "mind",
                               itemB == "see something" ~ "mind",
                               itemB == "reach for something" ~ "action",
                               itemB == "sit down" ~ "action", 
                               itemB == "jump up and down" ~ "action",
                               itemB == "kick something" ~ "action",
                               itemB == "take a walk" ~ "action",
                               itemB == "get tired" ~ "bio", 
                               itemB == "become hungry" ~ "bio",
                               itemB == "feel scared"~ "bio",
                               itemB == "experience pain"~ "bio",
                               itemB == "get sick"~ "bio")) 

df_combined <- df_freesort %>%
  # left_join(df_causal, by = c("subject_id", "itemA", "itemB")) # %>% 
  left_join(df_causal, by = c("subject_id", "itemA", "itemB", "domain_itemA", "domain_itemB")) %>%
  mutate(domain_itemA = factor(domain_itemA, levels = domain_order),
         domain_itemB = factor(domain_itemB, levels = domain_order)) %>% 
  relocate(freesort_distance, .before = "causal_distance") %>%
  relocate(trial_type_order, .after = "domain_itemB")

paged_table(df_combined)
write.csv(df_combined, data_processed_output_path, row.names = F)

2 Freesort RDMs

2.1 plot subject-specific raw responses

d %>% 
  filter(trial_type == "free_sort") %>%
  # filter(subject_id == "subj_03") %>% 
  mutate(y = 500 - y) %>% #invert y axis
  ggplot(aes(x = x, y = y)) +
  geom_point() +
  geom_text(aes(label = item), size = 3, vjust = -.2) +
  facet_wrap(~subject_id)

2.2 plot average raw responses

# pdf("my_plot_average.pdf", width = 7, height = 4)  # Width and height are in inches
d %>% 
  filter(trial_type == "free_sort") %>%
  mutate(y = 500 - y) %>% 
  group_by(item) %>% 
  mutate(mean_x = mean(x),
            mean_y = mean(y)) %>% 
  ungroup() %>% 
  distinct(item, .keep_all = TRUE) %>% 
  select(-subject_id) %>% 
  ggplot(aes(x = mean_x, y = mean_y)) +
  geom_point() +
  geom_text(aes(label = item), vjust = -1) +
  labs(title = "Average Raw Responses")

2.3 plot subject-specific Sorting RDMs

plot_rdm <- function(data, subject, desired_order) {
  df_matrix <- data %>%
    filter(subject_id == subject) %>%
    select(-subject_id) %>%
    select(itemA, itemB, freesort_distance) %>% 
    pivot_wider(names_from = itemB, values_from = freesort_distance) %>%
    column_to_rownames("itemA")
  
  df_matrix <- df_matrix[desired_order, desired_order]
  diag(df_matrix) <- 0 
  
  custom_palette <- colorRampPalette(c("red", "white", "blue"))(200)  # Generate 200 color levels

  corrplot(as.matrix(df_matrix), method = "color", is.corr = FALSE,
    col = custom_palette,  # Apply custom colors
    tl.col = "black", title = paste("Freesort RDM - ", subject),
    mar = c(0, 0, 2, 0), addgrid.col = "darkgray"
  )
}

subject_ids <- unique(df_freesort$subject_id)

#generate plots
# plots <- map(subject_ids, ~plot_rdm(df_freesort, .x, desired_order)) #run this to visualize plots in markdown  output

#save plots with subject name as file name
new_names <- paste0("subj_", str_pad(seq_along(subject_ids), width = 2, pad = "0")) 

#loop to save each plot as a separate PDF
for (i in seq_along(subject_ids)) {  
  pdf(file = paste0(base_path, "/figures/freesort/", new_names[i], ".pdf"), width = 8, height = 6)  #open a PDF device
  plot_rdm(df_freesort, subject_ids[i], desired_order)  
  dev.off()  #close the device to save the file
}

2.4 figure for paper: plot average freesort rdm

df_mean_freesort <- df_freesort %>% 
  unite("item_pairs", itemA, itemB, sep = "_", remove = FALSE) %>% 
  group_by(item_pairs) %>% 
  mutate(mean_freesort_distance = mean(freesort_distance)) %>% 
  # distinct(item_pairs, .keep_all = TRUE) %>% 
  ungroup()  #%>% 
  # select(itemA, itemB, mean_freesort_distance)  %>% 
  # mutate(itemA = factor(itemA, levels = desired_order),
  #        itemB = factor(itemB, levels = desired_order))
  
# matrixify and plot
df_mean_freesort_rdm <- df_mean_freesort %>% 
  distinct(item_pairs, .keep_all = TRUE) %>% 
  select(itemA, itemB, mean_freesort_distance) %>% 
  pivot_wider(names_from = itemB, values_from = mean_freesort_distance) %>% 
  column_to_rownames("itemA")  
  
df_mean_freesort_rdm <- df_mean_freesort_rdm[desired_order, desired_order]

diag(df_mean_freesort_rdm) <- NA #set diagonals 

#min max normalize
df_mean_freesort_matrix <- as.matrix(df_mean_freesort_rdm) 
min_val <- min(df_mean_freesort_matrix, na.rm = TRUE) 
max_val <- max(df_mean_freesort_matrix, na.rm = TRUE) 
df_mean_freesort_matrix <- (df_mean_freesort_matrix - min_val) / (max_val - min_val) 

#group_colors <- c("#2F2585", "#69ad44", "#f36b2d") 
group_colors <- c("#db9728", "#d0495a", "#00788a") 

#map each item to a color based on its group
item_colors <- setNames(c(rep(group_colors[1], 5), 
                          rep(group_colors[2], 5), 
                          rep(group_colors[3], 5)), 
                        desired_order)

#ensure colors align with the matrix order
label_colors <- item_colors[rownames(df_mean_freesort_matrix)]

custom_palette <- colorRampPalette(c("red", "white", "blue")) 

pdf(file = paste0(rdms_outputs_path, "/freesort/averageFreesortRDM.pdf"), width = 12, height = 12)

#large image
corrplot(
  df_mean_freesort_matrix,
  method = "color",
  is.corr = FALSE,
  col = custom_palette(200),
  #tl.col = "black", #could be label_colors
  tl.col = label_colors, #could be label_colors
  tl.cex = 2,
  na.label = "square",       
  na.label.col = "darkgrey",   
  #title = "Average Freesort RDM",
  mar = c(0, 0, 2, 0),
  addgrid.col = "darkgray",
  cl.offset = 1.0,  # Push labels even further
  cl.cex = 2,     # Slightly smaller label text
  cl.ratio = 0.22,   # Widen the legend bar. Change this to push words far away
  cex.main = 3
)

#run this with the above to include legend for the domain colors
legend(
  "topleft",
  inset = c(0.1, 0.1), #move the legend outside the plot
  legend = c("Mind", "Action", "Body"),
  fill = group_colors,
  border = NA,
  bty = "n", #no box around the legend
  cex = 2, #text size
  xpd = TRUE, #allow drawing outside the plot area
  #ncol = 1,
  y.intersp = 1.4 #increase this for more space between rows
)

2.5 Specific similarity relations reported in paper

compute_ci_freesort <- function(df, condition_label) {
  mean_distance <- mean(df$freesort_distance, na.rm = TRUE)
  sem <- sd(df$freesort_distance, na.rm = TRUE) / sqrt(nrow(df))
  ci_lower <- mean_distance - qt(0.975, df = nrow(df) - 1) * sem
  ci_upper <- mean_distance + qt(0.975, df = nrow(df) - 1) * sem

  cat(sprintf("%s: mean distance = %.3f [95%% CI: %.3f, %.3f]\n", 
              condition_label, mean_distance, ci_lower, ci_upper))
}

#compute and print within domain similarities
compute_ci_freesort(df_freesort %>% filter(domain_itemA == "mind", domain_itemB == "mind"), "mind → mind")
## mind → mind: mean distance = 0.355 [95% CI: 0.338, 0.373]
compute_ci_freesort(df_freesort %>% filter(domain_itemA == "action", domain_itemB == "action"), "action → action")
## action → action: mean distance = 0.370 [95% CI: 0.351, 0.390]
compute_ci_freesort(df_freesort %>% filter(domain_itemA == "bio", domain_itemB == "bio"), "bio → bio")
## bio → bio: mean distance = 0.368 [95% CI: 0.350, 0.386]
#compute and print cross domain similarities
compute_ci_freesort(df_freesort %>% filter(domain_itemA == "action", domain_itemB == "mind"), "mind → mind")
## mind → mind: mean distance = 0.565 [95% CI: 0.551, 0.580]
compute_ci_freesort(df_freesort %>% filter(domain_itemA == "bio", domain_itemB == "mind"), "action → action")
## action → action: mean distance = 0.574 [95% CI: 0.561, 0.587]
compute_ci_freesort(df_freesort %>% filter(domain_itemA == "bio", domain_itemB == "action"), "bio → bio")
## bio → bio: mean distance = 0.582 [95% CI: 0.568, 0.596]
compute_ci_freesort(df_freesort %>% filter(domain_itemA == "mind", domain_itemB == "action"), "bio → bio")
## bio → bio: mean distance = 0.565 [95% CI: 0.551, 0.580]
compute_ci_freesort(df_freesort %>% filter(domain_itemA == "mind", domain_itemB == "bio"), "bio → bio")
## bio → bio: mean distance = 0.574 [95% CI: 0.561, 0.587]
compute_ci_freesort(df_freesort %>% filter(domain_itemA == "action", domain_itemB == "bio"), "bio → bio")
## bio → bio: mean distance = 0.582 [95% CI: 0.568, 0.596]
#average cross-domain similarity
(.565 + .574 + .582 + .565 + .574 + .583) / 6
## [1] 0.5738333
#average within domain similarity
(.355 + .37 + .368) / 3
## [1] 0.3643333

2.5.1 Plot only upper tri

This plot is not illustrated in the paper

pdf(file = paste0(rdms_outputs_path, "/freesort/averageFreesortRDM-uppertri.pdf"), width = 12, height = 12)

#large image
corrplot(
  as.matrix(df_mean_freesort_matrix),
  method = "color",
  is.corr = FALSE,
  col = custom_palette(200),
  tl.col = label_colors, #could be label_colors
  tl.cex = 2,
  na.label = "square",       
  na.label.col = "darkgrey",   
  #title = "Average Freesort RDM",
  mar = c(0, 0, 2, 0),
  addgrid.col = "darkgray",
  cl.offset = 1.0,  #push labels even further
  cl.cex = 2,     #slightly smaller label text
  cl.ratio = 0.22,   #widen the legend bar. Change this to push words far away
  cex.main = 3,
  type = "upper"  #show only upper triangular matrix
)

2.5.2 lower tri

This plot is not illustrated in the paper. Upper and lower tri identical since the matrix is symmetrical.

pdf(file = paste0(rdms_outputs_path, "/freesort/averageFreesortRDM-lowertri.pdf"), width = 12, height = 12)

#large image
corrplot(
  as.matrix(df_mean_freesort_matrix),
  method = "color",
  is.corr = FALSE,
  col = custom_palette(200),
  #tl.col = "black", #could be label_colors
  # tl.col = label_colors, #could be label_colors
  # tl.cex = 2,
  tl.pos = "n",
  na.label = "square",       
  na.label.col = "darkgrey",   
  #title = "Freesort RDM lower tr",
  mar = c(0, 0, 2, 0),
  addgrid.col = "darkgray",
  cl.offset = 1.0,  #push labels even further
  cl.cex = 2,     #slightly smaller label text
  cl.ratio = 0.22,   #widen the legend bar. Change this to push words far away
  cex.main = 3,
  type = "lower"  #show only lower triangular matrix
)

3 Causal RDMs

3.1 plot subject-specific causal RDMs

plot_causal_rdm <- function(data, subject, desired_order) {
  
  df_matrix <- data %>%
    filter(subject_id == subject) %>%
    select(-subject_id) %>%
    select(itemA, itemB, causal_distance) %>% 
    pivot_wider(names_from = itemB, values_from = causal_distance) %>%
    column_to_rownames("itemA")
  
  df_matrix <- df_matrix[desired_order, desired_order]
  diag(df_matrix) <- 0
  
  custom_palette <- colorRampPalette(c("red", "white", "blue"))(200)

  
  corrplot(as.matrix(df_matrix), method = "color", is.corr = FALSE,
               col = custom_palette,  # Apply custom colors
           tl.col = "black", title = paste("Causal RDM - ", subject),
           mar = c(0, 0, 2, 0), addgrid.col = "darkgray")
}

subject_ids <- unique(df_causal$subject_id)

#generate plots
# plots <- map(subject_ids, ~plot_causal_rdm(df_causal, .x, desired_order)) #run this to generate plots

# save plots with subject name as file name
new_names <- paste0("subj_", str_pad(seq_along(subject_ids), width = 2, pad = "0"))

for (i in seq_along(subject_ids)) { 

    pdf(file = paste0(base_path, "/figures/causal/", new_names[i], ".pdf"), width = 8, height = 6)
  plot_causal_rdm(df_causal, subject_ids[i], desired_order)  
  dev.off()  
}

3.2 figure for paper: plot average causal RDM

df_mean_causal <- df_causal %>% 
  mutate(item_pairs = str_c(itemA, itemB, sep = "-"))  %>%
  group_by(item_pairs) %>%
  mutate(mean_causal_distance = mean(causal_distance, na.rm = TRUE)) %>%  #average across participants
  ungroup() 

df_mean_causal_rdm <- df_mean_causal %>%
  distinct(item_pairs, .keep_all = TRUE) %>% 
  select(itemA, itemB, mean_causal_distance) %>% 
  pivot_wider(names_from = itemB, values_from = mean_causal_distance) %>%
  column_to_rownames("itemA")  

df_mean_causal_rdm <- df_mean_causal_rdm[desired_order, desired_order]

diag(df_mean_causal_rdm) <- NA 

#min max normalize
df_mean_causal_matrix <- as.matrix(df_mean_causal_rdm) 
min_val <- min(df_mean_causal_matrix, na.rm = TRUE) 
max_val <- max(df_mean_causal_matrix, na.rm = TRUE) 
df_mean_causal_matrix <- (df_mean_causal_matrix - min_val) / (max_val - min_val) 

#colors
group_colors <- c("#db9728", "#d0495a", "#00788a") 


item_colors <- setNames(c(rep(group_colors[1], 5), #mind
                          rep(group_colors[2], 5), #action
                          rep(group_colors[3], 5)), #body
                        desired_order)

label_colors <- item_colors[rownames(df_mean_causal_rdm)]

custom_palette <- colorRampPalette(c("red", "white", "blue")) 

pdf(file = paste0(rdms_outputs_path, "/causal/averageCausalRDM.pdf"), width = 12, height = 12)

#large image
corrplot(
  df_mean_causal_matrix,
  method = "color",
  is.corr = FALSE,
  col = custom_palette(200),
  #tl.col = "black", 
  tl.col = label_colors, 
  tl.cex = 2,
  mar = c(0, 0, 2, 0),
  addgrid.col = "darkgray",
  na.label = "square",       
  na.label.col = "darkgrey",
  cl.offset = 1.0, 
  cl.cex = 2, #label text
  cl.ratio = 0.22, #push words far away
  cex.main = 3
)

#run this together with above to include a domains color legend
legend(
  "topleft",
  inset = c(0.1, 0.1),
  legend = c("Mind", "Action", "Body"),
  fill = group_colors,
  border = NA,
  bty = "n",
  cex = 2, #text size
  xpd = TRUE, #draw outside the plot area
  #ncol = 1,
  y.intersp = 1.4 #more space between rows
)

3.3 Specific causal relations reported in paper

compute_ci <- function(df, condition_label) {
  mean_distance <- mean(df$causal_distance, na.rm = TRUE)
  sem <- sd(df$causal_distance, na.rm = TRUE) / sqrt(nrow(df))
  ci_lower <- mean_distance - qt(0.975, df = nrow(df) - 1) * sem
  ci_upper <- mean_distance + qt(0.975, df = nrow(df) - 1) * sem

  cat(sprintf("%s: mean distance = %.3f [95%% CI: %.3f, %.3f]\n", 
              condition_label, mean_distance, ci_lower, ci_upper))
}


#defining and computing each condition
compute_ci(df_causal %>% filter(domain_itemA == "mind", domain_itemB == "action"), "mind → action")
## mind → action: mean distance = 0.323 [95% CI: 0.309, 0.337]
compute_ci(df_causal %>% filter(domain_itemA == "action", domain_itemB == "mind"), "action → mind")
## action → mind: mean distance = 0.415 [95% CI: 0.399, 0.431]
compute_ci(df_causal %>% filter(domain_itemA == "action", domain_itemB == "action"), "action → action")
## action → action: mean distance = 0.611 [95% CI: 0.592, 0.630]
compute_ci(df_causal %>% filter(domain_itemA == "bio", domain_itemB == "bio"), "body → body")
## body → body: mean distance = 0.381 [95% CI: 0.362, 0.400]
compute_ci(df_causal %>% filter(domain_itemA == "bio", domain_itemB == "action"), "body → action")
## body → action: mean distance = 0.442 [95% CI: 0.424, 0.460]
compute_ci(df_causal %>% filter(domain_itemA == "action", domain_itemB == "bio"), "action → body")
## action → body: mean distance = 0.427 [95% CI: 0.410, 0.445]
#Perception → Cognition
compute_ci(df_causal %>% 
             filter(domain_itemA == "mind", domain_itemB == "mind", 
                    itemA %in% c("hear something", "see something"),
                    itemB %in% c("choose what to do", "think about something", "remember something")),
           "perception → cognition")
## perception → cognition: mean distance = 0.149 [95% CI: 0.130, 0.167]
#Cognition → Perception
compute_ci(df_causal %>% 
             filter(domain_itemA == "mind", domain_itemB == "mind", 
                    itemB %in% c("hear something", "see something"),
                    itemA %in% c("choose what to do", "think about something", "remember something")),
           "cognition → perception")
## cognition → perception: mean distance = 0.477 [95% CI: 0.442, 0.512]
#Seeing → Sick
compute_ci(df_causal %>% filter(itemA == "see something", itemB == "get sick"), "seeing → sick")
## seeing → sick: mean distance = 0.390 [95% CI: 0.300, 0.481]
#Hearing → Sick
compute_ci(df_causal %>% filter(itemA == "hear something", itemB == "get sick"), "hearing → sick")
## hearing → sick: mean distance = 0.606 [95% CI: 0.514, 0.697]

4 Correlations: Sorting vs Causal Task

4.1 mean within-subject correlation

compute_subjectwise_rdm_correlation <- function(df_causal, df_freesort, desired_order, metric = "kendall") {
  subject_ids <- unique(df_causal$subject_id)
  subject_correlations <- numeric(length(subject_ids))
  
  for (i in seq_along(subject_ids)) {
    subject <- subject_ids[i]
    
    freesort_rdm <- df_freesort %>%
      filter(subject_id == subject) %>%
      select(itemA, itemB, freesort_distance) %>%
      pivot_wider(names_from = itemB, values_from = freesort_distance) %>%
      column_to_rownames("itemA") %>%
      as.matrix()
      diag(freesort_rdm) <- NA
    freesort_rdm <- freesort_rdm[desired_order, desired_order]
    
    causal_rdm <- df_causal %>%
      filter(subject_id == subject) %>%
      select(itemA, itemB, causal_distance) %>%
      pivot_wider(names_from = itemB, values_from = causal_distance) %>%
      column_to_rownames("itemA") %>%
      as.matrix()
        diag(causal_rdm) <- NA
    causal_rdm <- causal_rdm[desired_order, desired_order]
    
    freesort_flat <- as.numeric(freesort_rdm)
    causal_flat <- as.numeric(causal_rdm)
    
    subject_correlations[i] <- cor(freesort_flat, causal_flat, method = metric, use = "complete.obs")
  }
  
  return(subject_correlations)
}

subject_correlations <- compute_subjectwise_rdm_correlation(df_causal, df_freesort, desired_order)
hist(subject_correlations)

mean_correlation <- mean(subject_correlations, na.rm = TRUE)
mean_correlation
## [1] 0.03333567

4.2 t-test

t_test_result <- t.test(subject_correlations, mu = 0)
t_test_result
## 
##  One Sample t-test
## 
## data:  subject_correlations
## t = 3.7597, df = 49, p-value = 0.0004539
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.01551768 0.05115366
## sample estimates:
##  mean of x 
## 0.03333567

4.3 t-test assumptions

seem fine, so no need for perm test

shapiro_result <- shapiro.test(subject_correlations)
print(shapiro_result)
## 
##  Shapiro-Wilk normality test
## 
## data:  subject_correlations
## W = 0.9804, p-value = 0.5689
qqnorm(subject_correlations)
qqline(subject_correlations, col = "red")

hist(subject_correlations, main = "Histogram of Subject Correlations", 
     xlab = "Correlation", col = "lightblue", border = "black")

4.4 Correlation between mean rdms

Note that this shows mean rdms are uncorrelated, while previously, we have seen a small positive correlation when we run subject-level correlations

#using Pearson's correlation (non-significant)
mean_rdm_correlations <- cor(as.numeric(df_mean_freesort_matrix), 
                             as.numeric(df_mean_causal_matrix), 
                             method = "pearson", use = "complete.obs")
cor.test(as.numeric(df_mean_freesort_matrix), 
         as.numeric(df_mean_causal_matrix), 
         method = "pearson", 
         use = "complete.obs")
## 
##  Pearson's product-moment correlation
## 
## data:  as.numeric(df_mean_freesort_matrix) and as.numeric(df_mean_causal_matrix)
## t = 0.78819, df = 208, p-value = 0.4315
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.08142201  0.18856724
## sample estimates:
##        cor 
## 0.05456999
#using Kendall's correlation (also non-significant)
n_permutations <- 10000
observed_tau <- cor(as.numeric(df_mean_freesort_matrix),
                    as.numeric(df_mean_causal_matrix),
                    method = "kendall",
                    use = "complete.obs")
observed_tau 
## [1] 0.01800242
null_dist <- replicate(n_permutations, {
  permuted <- sample(as.numeric(df_mean_causal_matrix))
  cor(as.numeric(df_mean_freesort_matrix), permuted, method = "kendall", use = "complete.obs")
})
p_value <- mean(null_dist >= observed_tau)  #one-sided
p_value
## [1] 0.3511

5 Correlations: Data vs Models

5.1 General functions for correlation plot

extract_upper_tri <- function(matrix) {
  matrix[upper.tri(matrix)]
}

compute_rdm_correlation <- function(data, emp_col, theor_col, desired_order, method = "kendall") {
  rdm1 <- data %>%
    select(itemA, itemB, {{ emp_col }}) %>%
    pivot_wider(names_from = itemB, values_from = {{ emp_col }}) %>%
    column_to_rownames("itemA") %>%
    as.matrix()
  
  rdm2 <- data %>%
    select(itemA, itemB, {{ theor_col }}) %>%
    pivot_wider(names_from = itemB, values_from = {{ theor_col }}) %>%
    column_to_rownames("itemA") %>%
    as.matrix()
  
  rdm1 <- rdm1[desired_order, desired_order]
  rdm2 <- rdm2[desired_order, desired_order]
  
  diag(rdm1) <- NA
  diag(rdm2) <- NA
  
  upper_rdm1 <- extract_upper_tri(rdm1)
  upper_rdm2 <- extract_upper_tri(rdm2)
  
  cor(upper_rdm1, upper_rdm2, method = method, use = "complete.obs")
}

5.2 noise ceiling functions

calculate_freesort_noise_ceiling <- function(data, subjects, desired_order, metric = "kendall") {
  # Initialize storage for correlations
  upper_bound <- numeric(length(subjects))
  lower_bound <- numeric(length(subjects))
  
  for (i in seq_along(subjects)) {
    subject <- subjects[i]
    
    #Subject's RDM
    subject_rdm <- data %>%
        filter(subject_id == subject) %>%
        select(subject_id, itemA, itemB, freesort_distance) %>% 
        pivot_wider(names_from = itemB, values_from = freesort_distance) %>%
        column_to_rownames("itemA") %>%
        as.matrix()
    subject_rdm <- subject_rdm[desired_order, desired_order]
    
    #Grand average RDM
    grand_avg_rdm <- data %>%
        select(subject_id, itemA, itemB, freesort_distance) %>% 
        unite("item_pairs", itemA, itemB, sep = "_", remove = FALSE) %>% 
        group_by(item_pairs) %>%
        mutate(freesort_distance = mean(freesort_distance)) %>%
        ungroup() %>% 
        distinct(item_pairs, .keep_all = TRUE) %>% 
        select(-c(item_pairs, subject_id)) %>% 
        pivot_wider(names_from = itemB, values_from = freesort_distance) %>%
        column_to_rownames("itemA") %>%
        as.matrix()
    grand_avg_rdm <- grand_avg_rdm[desired_order, desired_order]
    
    #Other subjects' average RDM
    other_avg_rdm <- data %>%
      select(subject_id, itemA, itemB, freesort_distance) %>% 
      filter(subject_id != "subj_01") %>%
      unite("item_pairs", itemA, itemB, sep = "_", remove = FALSE) %>% 
      group_by(item_pairs) %>%
      mutate(freesort_distance = mean(freesort_distance)) %>%
      ungroup() %>% 
      distinct(item_pairs, .keep_all = TRUE) %>% 
      select(-c(item_pairs, subject_id)) %>% 
      pivot_wider(names_from = itemB, values_from = freesort_distance) %>%
      column_to_rownames("itemA") %>%
      as.matrix()
    other_avg_rdm <- other_avg_rdm[desired_order, desired_order]
    
    subject_upper <- as.numeric(subject_rdm[upper.tri(subject_rdm)])
    other_upper <- as.numeric(other_avg_rdm[upper.tri(other_avg_rdm)])
    grand_upper <- as.numeric(grand_avg_rdm[upper.tri(grand_avg_rdm)])
    
    upper_bound[i] <- cor(subject_upper, other_upper, method = metric)
    lower_bound[i] <- cor(subject_upper, grand_upper, method = metric)
  }
  
  # Return average bounds
  list(
    upper_bound = mean(upper_bound, na.rm = TRUE),
    lower_bound = mean(lower_bound, na.rm = TRUE)
  )
}

calculate_causal_noise_ceiling <- function(data, subjects, desired_order, metric = "kendall") {
  #Initialize storage for correlations
  upper_tri_upper_bound <- numeric(length(subjects))
  upper_tri_lower_bound <- numeric(length(subjects))
  
  lower_tri_upper_bound <- numeric(length(subjects))
  lower_tri_lower_bound <- numeric(length(subjects))
  
  #Loop through each subject
  for (i in seq_along(subjects)) {
    subject <- subjects[i]
    
    #subject's RDM
    subject_rdm <- data %>%
        filter(subject_id == subject) %>%
        select(subject_id, itemA, itemB, causal_distance) %>% 
        pivot_wider(names_from = itemB, values_from = causal_distance) %>%
        column_to_rownames("itemA") %>%
        as.matrix()
    subject_rdm <- subject_rdm[desired_order, desired_order]
    
    #grand average RDM
    grand_avg_rdm <- data %>%
        select(subject_id, itemA, itemB, causal_distance) %>% 
        unite("item_pairs", itemA, itemB, sep = "_", remove = FALSE) %>% 
        group_by(item_pairs) %>%
        mutate(causal_distance = mean(causal_distance)) %>%
        ungroup() %>% 
        distinct(item_pairs, .keep_all = TRUE) %>% 
        select(-c(item_pairs, subject_id)) %>% 
        pivot_wider(names_from = itemB, values_from = causal_distance) %>%
        column_to_rownames("itemA") %>%
        as.matrix()
    grand_avg_rdm <- grand_avg_rdm[desired_order, desired_order]
    
    #other subjects' average RDM
    other_avg_rdm <- data %>%
      select(subject_id, itemA, itemB, causal_distance) %>% 
      filter(subject_id != "subj_01") %>%
      unite("item_pairs", itemA, itemB, sep = "_", remove = FALSE) %>% 
      group_by(item_pairs) %>%
      mutate(causal_distance = mean(causal_distance)) %>%
      ungroup() %>% 
      distinct(item_pairs, .keep_all = TRUE) %>% 
      select(-c(item_pairs, subject_id)) %>% 
      pivot_wider(names_from = itemB, values_from = causal_distance) %>%
      column_to_rownames("itemA") %>%
      as.matrix()
    other_avg_rdm <- other_avg_rdm[desired_order, desired_order]
    
    #upper tri noise ceiling
    subject_upper <- as.numeric(subject_rdm[upper.tri(subject_rdm)])
    other_upper <- as.numeric(other_avg_rdm[upper.tri(other_avg_rdm)])
    grand_upper <- as.numeric(grand_avg_rdm[upper.tri(grand_avg_rdm)])

    upper_tri_upper_bound[i] <- cor(subject_upper, other_upper, method = metric)
    upper_tri_lower_bound[i] <- cor(subject_upper, grand_upper, method = metric)

    #lower tri noise ceiling
    subject_lower <- as.numeric(subject_rdm[lower.tri(subject_rdm)])
    other_lower <- as.numeric(other_avg_rdm[lower.tri(other_avg_rdm)])
    grand_lower <- as.numeric(grand_avg_rdm[lower.tri(grand_avg_rdm)])
    
    lower_tri_upper_bound[i] <- cor(subject_lower, other_lower, method = metric)
    lower_tri_lower_bound[i] <- cor(subject_lower, grand_lower, method = metric)

  }
  
  #return average bounds
  list(
    upper_tri_upper_bound = mean(upper_tri_upper_bound, na.rm = TRUE),
    upper_tri_lower_bound = mean(upper_tri_lower_bound, na.rm = TRUE),
    
    lower_tri_upper_bound = mean(lower_tri_upper_bound, na.rm = TRUE),
    lower_tri_lower_bound = mean(lower_tri_lower_bound, na.rm = TRUE)
  )
}

5.3 Calculate Sorting Task noise ceiling

#create correlations data frame
d_model_predictions <- read_csv(model_predictions_input_path)
## Rows: 210 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): itemA, itemB
## dbl (4): theor1_Mind-Body-Action, theor2_PerCog-OdActSpAct-BodyStimBodynoSti...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
d_freesort_correlations <- df_combined %>%
  left_join(d_model_predictions, by = c("itemA", "itemB")) %>%
  group_by(subject_id) %>%
  mutate(
    `1_Mind-Body-Action` = compute_rdm_correlation(pick(everything()), freesort_distance, `theor1_Mind-Body-Action`, desired_order),
    `2_PerCog-ObjDir-Stimulus` = compute_rdm_correlation(pick(everything()), freesort_distance, `theor2_PerCog-OdActSpAct-BodyStimBodynoStim`, desired_order),
    `3_Physical-Ethereal` = compute_rdm_correlation(pick(everything()), freesort_distance, `theor3_Physical-Ethereal`, desired_order),
    `4_cosine_similarity` = compute_rdm_correlation(pick(everything()), freesort_distance, `theor4_cosine_similarity`, desired_order),
    `5_causal_judgments` = compute_rdm_correlation(pick(everything()), freesort_distance, `causal_distance`, desired_order)
  ) %>%
  ungroup() %>%
  relocate(causal_distance, .after = "freesort_distance")

subjects_freesort <- unique(d_freesort_correlations$subject_id)
noise_ceiling_sorting <- calculate_freesort_noise_ceiling(d_freesort_correlations, subjects_freesort, desired_order)
noise_ceiling_sorting
## $upper_bound
## [1] 0.2589605
## 
## $lower_bound
## [1] 0.2587916

5.4 Calculate Causal Task noise ceiling

d_causal_correlations <-  df_combined %>%
  left_join(d_model_predictions, by = c("itemA", "itemB")) %>% 
  group_by(subject_id) %>%
  mutate(
    `1_Mind-Body-Action` = compute_rdm_correlation(pick(everything()), causal_distance, `theor1_Mind-Body-Action`, desired_order),
    `2_PerCog-ObjDir-Stimulus` = compute_rdm_correlation(pick(everything()), causal_distance, `theor2_PerCog-OdActSpAct-BodyStimBodynoStim`, desired_order),
    `3_Physical-Ethereal` = compute_rdm_correlation(pick(everything()), causal_distance, `theor3_Physical-Ethereal`, desired_order),
    `4_cosine_similarity` = compute_rdm_correlation(pick(everything()), causal_distance, `theor4_cosine_similarity`, desired_order),
    `5_causal_judgments` = compute_rdm_correlation(pick(everything()), causal_distance, `causal_distance`, desired_order)
  ) %>%
  ungroup() 

# Compute noise ceiling for all subjects
subjects_causal <- unique(d_causal_correlations$subject_id)
noise_ceiling_causal <- calculate_causal_noise_ceiling(d_causal_correlations,subjects_causal, desired_order)
noise_ceiling_causal
## $upper_tri_upper_bound
## [1] 0.4409539
## 
## $upper_tri_lower_bound
## [1] 0.4413385
## 
## $lower_tri_upper_bound
## [1] 0.4124777
## 
## $lower_tri_lower_bound
## [1] 0.4130283

5.5 figure: plot RSA corr between observed and model rdms

aggregated_freesort_correlations <- d_freesort_correlations %>% 
  rename("3 Category" = `1_Mind-Body-Action`,
         "6 Category" = `2_PerCog-ObjDir-Stimulus`,
         "2 Category" = `3_Physical-Ethereal`,
         "Cosine Similarity" = `4_cosine_similarity`) %>% 
  pivot_longer(
    cols = c("2 Category", "3 Category", "6 Category", "Cosine Similarity"),
    names_to = "correlation_type",
    values_to = "correlation_value"
  ) %>%
  group_by(subject_id, correlation_type) %>%
  summarize(mean_correlation = mean(correlation_value, na.rm = TRUE), .groups = "drop") %>% 
  mutate(method = "Sorting Task")

aggregated_causal_correlations <- d_causal_correlations %>% 
  rename("3 Category" = `1_Mind-Body-Action`,
         "6 Category" = `2_PerCog-ObjDir-Stimulus`,
         "2 Category" = `3_Physical-Ethereal`,
         "Cosine Similarity" = `4_cosine_similarity`) %>% 
  pivot_longer(
    cols = c("2 Category", "3 Category", "6 Category", "Cosine Similarity"),
    names_to = "correlation_type",
    values_to = "correlation_value"
  ) %>%
  group_by(subject_id, correlation_type) %>%
  summarize(mean_correlation = mean(correlation_value, na.rm = TRUE), .groups = "drop") %>% 
  mutate(method = "Causal Task")

#combine both corrs into one
d_combined_correlations <- bind_rows(aggregated_freesort_correlations, aggregated_causal_correlations) %>% 
  mutate(method = factor(method, levels = c("Sorting Task", "Causal Task"))) 

bar_plot <- ggplot(d_combined_correlations, 
                   aes(x = reorder(correlation_type, mean_correlation, FUN = function(x) -mean(x)), 
                        y = mean_correlation, fill = method)) +
  stat_summary(
    fun = "mean",
    geom = "bar",
    position = "dodge",  
    color = "black"
  ) +
  stat_summary(
    fun.data = mean_se,
    geom = "errorbar",
    width = 0.3,
    position = position_dodge(width = 0.9), 
    color = "black"
  ) +
  geom_point(aes(color = method), alpha = 0.3, size = 3, position = position_jitterdodge(jitter.width = 0.2)) +
  scale_fill_manual(values = c("Sorting Task" = "darkgray", "Causal Task" = "black")) + #bar fill
  scale_color_manual(values = c("Sorting Task" = "lightgray", "Causal Task" = "#4A4A4A")) + #point colors
  
  geom_hline(yintercept = noise_ceiling_sorting$upper_bound, color = "darkgray", linetype = "dashed", linewidth = 0.5) +
  geom_hline(yintercept = noise_ceiling_sorting$lower_bound, color = "darkgray", linetype = "dashed", linewidth = 0.5) +
  geom_hline(yintercept = noise_ceiling_causal$upper_tri_upper_bound, color = "black", linetype = "dashed", linewidth = 0.5) +
  geom_hline(yintercept = noise_ceiling_causal$upper_tri_lower_bound, color = "black", linetype = "dashed", linewidth = 0.5) +
  theme_bw() +
  theme(
    axis.text.x = element_blank(),
    axis.text.y = element_text(size = 16),
    axis.ticks.length = unit(.25, "cm"),
    axis.title.x = element_blank(),
    axis.title.y = element_text(size = 18),
    legend.position = "top",
    legend.title = element_blank(),
    plot.title = element_text(size = 20, face = "bold", hjust = 0.5)  
  ) +
  theme(
    plot.margin = margin(t = 10, r = 10, b = 20, l = 20),
    plot.title = element_text(size = 20, face = "bold", hjust = 0.5)  # Adjust main title size
  ) +
  labs(
    y = "Kendall's Tau Correlations",
    title = "Comparison of Theoretical and Empirical RDM Correlations"
  )

# Load and create RDM images
rdm_2_category <- ggdraw() + draw_image(paste0(rdms_input_path, "/theory_4.pdf"), scale = 1.2)
rdm_3_category <- ggdraw() + draw_image(paste0(rdms_input_path, "/theory_2.pdf"), scale = 1.2)
rdm_6_category <- ggdraw() + draw_image(paste0(rdms_input_path, "/theory_3.pdf"), scale = 1.2)
rdm_cosine <- ggdraw() + draw_image(paste0(rdms_input_path, "/theory_1.pdf"), scale = 1.2)

rdm_row <- rdm_3_category + rdm_6_category + rdm_2_category + rdm_cosine +
  plot_layout(ncol = 4)

plot_correlations <- bar_plot / rdm_row + 
  plot_layout(heights = c(5, 2)) #adjusts height ratio

plot_correlations

ggsave(
  filename = "figures/rdm_correlation_plot.pdf",
  plot = plot_correlations,
  device = "pdf",
  #path = "path/to/directory",
  width = 12,
  height = 5,
  units = "in"
)

5.6 t-tests of correlations

This compared correlations to 0.

t_test_results <- d_combined_correlations %>%
  group_by(method, correlation_type) %>%
  dplyr::summarise(
    mean_mean_correlation = mean(mean_correlation, na.rm = TRUE),
    t_value = t.test(mean_correlation, mu = 0)$statistic,  
    p_value = t.test(mean_correlation, mu = 0)$p.value,
    .groups = "drop"
  )

print(t_test_results)
## # A tibble: 8 × 5
##   method       correlation_type  mean_mean_correlation t_value  p_value
##   <fct>        <chr>                             <dbl>   <dbl>    <dbl>
## 1 Sorting Task 2 Category                       0.144     6.00 2.35e- 7
## 2 Sorting Task 3 Category                       0.254     9.77 4.32e-13
## 3 Sorting Task 6 Category                       0.193    11.1  5.27e-15
## 4 Sorting Task Cosine Similarity                0.129     7.95 2.31e-10
## 5 Causal Task  2 Category                      -0.0823   -4.96 8.90e- 6
## 6 Causal Task  3 Category                      -0.0357   -2.93 5.10e- 3
## 7 Causal Task  6 Category                      -0.0569   -6.20 1.16e- 7
## 8 Causal Task  Cosine Similarity               -0.0797   -7.49 1.14e- 9

6 Linear models comparing the 4 hypotheses

6.1 Freesort data

Reported in main paper.

freesort_correlations <- d_freesort_correlations %>% 
  rename("mind_action_physiology_model" = `1_Mind-Body-Action`,
         "finegrained_mind_action_physiology_model" = `2_PerCog-ObjDir-Stimulus`,
         "physical_psychological_model" = `3_Physical-Ethereal`,
         "use_phrase_embeddings_model" = `4_cosine_similarity`) %>% 
  pivot_longer(
    cols = c("mind_action_physiology_model":"use_phrase_embeddings_model"),
    names_to = "RDM_type",
    values_to = "tau"
  ) %>%
  select(subject_id, itemA, itemB, domain_itemA, domain_itemB, trial_type_order, freesort_distance, causal_distance, RDM_type, tau) %>% 
  mutate(
    RDM_type = as.factor(RDM_type),  # Ensure categorical
    RDM_type = relevel(RDM_type, ref = "mind_action_physiology_model") 
  )

#Note: mind action physiology is reference model. Estiamtes are mean kendall's taus
model_freesort <- lmerTest::lmer(tau ~ RDM_type + (1 | subject_id), 
              data = freesort_correlations)

summary(model_freesort)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: tau ~ RDM_type + (1 | subject_id)
##    Data: freesort_correlations
## 
## REML criterion at convergence: -89453.4
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.47711 -0.61820 -0.07668  0.62741  3.03237 
## 
## Random effects:
##  Groups     Name        Variance Std.Dev.
##  subject_id (Intercept) 0.01564  0.12505 
##  Residual               0.00689  0.08301 
## Number of obs: 42000, groups:  subject_id, 50
## 
## Fixed effects:
##                                                    Estimate Std. Error
## (Intercept)                                       2.538e-01  1.770e-02
## RDM_typefinegrained_mind_action_physiology_model -6.049e-02  1.146e-03
## RDM_typephysical_psychological_model             -1.103e-01  1.146e-03
## RDM_typeuse_phrase_embeddings_model              -1.248e-01  1.146e-03
##                                                          df t value Pr(>|t|)
## (Intercept)                                       4.915e+01   14.34   <2e-16
## RDM_typefinegrained_mind_action_physiology_model  4.195e+04  -52.80   <2e-16
## RDM_typephysical_psychological_model              4.195e+04  -96.25   <2e-16
## RDM_typeuse_phrase_embeddings_model               4.195e+04 -108.95   <2e-16
##                                                     
## (Intercept)                                      ***
## RDM_typefinegrained_mind_action_physiology_model ***
## RDM_typephysical_psychological_model             ***
## RDM_typeuse_phrase_embeddings_model              ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) RDM_____ RDM_t__
## RDM_typ____ -0.032                 
## RDM_typph__ -0.032  0.500          
## RDM_typs___ -0.032  0.500    0.500

6.1.1 Getting CIs

#for mind action and physiology model
d_target <- d_combined_correlations %>%
  filter(correlation_type == "3 Category", method == "Sorting Task")

t_test <- t.test(d_target$mean_correlation, mu = 0)

t_test$estimate # mean Kendall's tau
## mean of x 
## 0.2538288
t_test$conf.int 
## [1] 0.2016248 0.3060328
## attr(,"conf.level")
## [1] 0.95

6.1.2 Power analysis

Exploratory power analysis for planning follow-up studies (i.e. replication of study 1 and 2).

#library(pwr)

#power analysis with d

#one sample because comparing tau it to 0, and two sided because wanting to see if different from 0
#also 0.65 is lower bound estimate of d

#pwr.t.test(d = 0.65, power = 0.8, sig.level = 0.05, type = "one.sample", alternative = "two.sided")

#this is d from t(49) = 9.76, p = 4.51 x 10-13 (d is t over root n)
#pwr.t.test(d = 1.38, power = 0.8, sig.level = 0.05, type = "one.sample", alternative = "two.sided")

# power analysis with R
#pwr.r.test(r = 0.202, power = 0.8, sig.level = 0.05, alternative = "two.sided")

#using r = sin (.5 πτ) for 0.2016 lower bound of kendall's tau
#sin(.5 * pi * 0.2016)
#pwr.r.test(r = 0.311, power = 0.8, sig.level = 0.05)

#for regular estimate
#sin(.5 * pi * 0.252)
#pwr.r.test(r = 0.386, power = 0.8, sig.level = 0.05)

6.2 Causal data

Testing of the best model for the Freesort data (i.e. Mind, action and physiology is good or bad at describing the Causal data)

taus_causal <- d_causal_correlations %>% 
  rename("mind_action_physiology_model" = `1_Mind-Body-Action`,
         "finegrained_mind_action_physiology_model" = `2_PerCog-ObjDir-Stimulus`,
         "physical_psychological_model" = `3_Physical-Ethereal`,
         "use_phrase_embeddings_model" = `4_cosine_similarity`) %>% 
  pivot_longer(
    cols = c("mind_action_physiology_model":"use_phrase_embeddings_model"),
    names_to = "RDM_type",
    values_to = "tau"
  ) %>%
  filter(RDM_type == "mind_action_physiology_model") %>% 
  rename("causal" = tau) %>% 
  distinct(subject_id, .keep_all = TRUE) %>% 
  select(subject_id, causal)


taus_freesort <- d_freesort_correlations %>% 
  rename("mind_action_physiology_model" = `1_Mind-Body-Action`,
         "finegrained_mind_action_physiology_model" = `2_PerCog-ObjDir-Stimulus`,
         "physical_psychological_model" = `3_Physical-Ethereal`,
         "use_phrase_embeddings_model" = `4_cosine_similarity`) %>% 
  pivot_longer(
    cols = c("mind_action_physiology_model":"use_phrase_embeddings_model"),
    names_to = "RDM_type",
    values_to = "tau"
  ) %>%
  filter(RDM_type == "mind_action_physiology_model") %>% 
  rename("freesort" = tau) %>% 
  distinct(subject_id, .keep_all = TRUE) %>% 
  select(subject_id, freesort)

combined_taus <- taus_causal %>% 
  left_join(taus_freesort,
            by = "subject_id") %>% 
  pivot_longer(cols = c(causal, freesort),
               names_to = "task",
               values_to = "tau") %>% 
  mutate(
    task = as.factor(task),  
    task = relevel(task, ref = "freesort") 
  )
  
model_prediction2 <- lmerTest::lmer(tau ~ task + (1|subject_id), data = combined_taus)
summary(model_prediction2)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: tau ~ task + (1 | subject_id)
##    Data: combined_taus
## 
## REML criterion at convergence: -95.4
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.16565 -0.55581  0.01977  0.50180  2.61279 
## 
## Random effects:
##  Groups     Name        Variance Std.Dev.
##  subject_id (Intercept) 0.002424 0.04924 
##  Residual               0.018143 0.13470 
## Number of obs: 100, groups:  subject_id, 50
## 
## Fixed effects:
##             Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)  0.25383    0.02028 96.65704   12.52  < 2e-16 ***
## taskcausal  -0.28949    0.02694 49.00000  -10.75 1.75e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##            (Intr)
## taskcausal -0.664
str(taus_causal)
## tibble [50 × 2] (S3: tbl_df/tbl/data.frame)
##  $ subject_id: chr [1:50] "subj_01" "subj_02" "subj_03" "subj_04" ...
##  $ causal    : num [1:50] 0.0718 -0.0757 0.0109 -0.0184 -0.0531 ...
mean(taus_causal$causal)
## [1] -0.03565764

6.2.1 Power analyses

Exploratory power analysis for planning future studies.

# t_result <- t.test(taus_causal$causal, mu = 0) 
# 
# #compute Cohen's d
# t_value <- t_result$statistic
# n <- nrow(taus_causal)
# d <- as.numeric(t_value) / sqrt(n)
# 
# power_result <- pwr.t.test(d = d, power = 0.8, sig.level = 0.05, type = "one.sample")
# 
# list(
#   t_value = t_value,
#   cohen_d = d,
#   required_n_for_power = ceiling(power_result$n)
# )

6.2.2 Plot: compare Mind action physiology fit with freesort vs causal data

ggplot(combined_taus, aes(x = task, y = tau)) +
  geom_boxplot(outlier.shape = NA, fill = "lightgray") +
  geom_jitter(width = 0.2, alpha = 0.5) + 
  labs(title = "Comparison of Tau Values by Task",
       x = "Task",
       y = "Tau") +
  scale_fill_manual(values = c("Sorting Task" = "darkgray", "Causal Task" = "black")) + #bar fill
  scale_color_manual(values = c("Sorting Task" = "lightgray", "Causal Task" = "#4A4A4A")) +
  theme_bw()
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's fill values.
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's colour values.

ggplot(combined_taus, aes(x = task, y = tau, fill = task)) +
  geom_boxplot(outlier.shape = NA, alpha = 0.6) +
  geom_jitter(width = 0.2, alpha = 0.5, aes(color = task)) + 
  scale_fill_manual(values = c("freesort" = "darkgray", "causal" = "black")) +  # Customize colors
  scale_color_manual(values = c("freesort" = "darkgray", "causal" = "black")) + # Match colors for points
  labs(title = "Comparison of Tau Values by Task",
       x = "Task",
       y = "Tau") +
  theme_bw() +
  theme(legend.position = "none",
        axis.text.x = element_text(size = 14),  # Increase x-axis numbers
        axis.text.y = element_text(size = 14), 
        axis.title.x = element_text(size = 16), 
        axis.title.y = element_text(size = 16)) 

7 Item Selection for study 2

7.1 Get freesort minus causal distances

Here we are computing freesort minus causal distances between choice item (causal/similar choice) and target items (itemB). These values will be used to select items for study 2 in the study 2 script.

#get mean distances by item pairs
df_pair_means <- df_combined %>% 
  select(subject_id, itemA, itemB, freesort_distance, causal_distance) %>% 
  unite("item_pairs", itemA, itemB, sep = "_", remove = FALSE) %>% 
  group_by(item_pairs) %>%
  summarize(mean_causal_distance = mean(causal_distance, na.rm = TRUE),
         mean_freesort_distance = mean(freesort_distance, na.rm = TRUE)) %>% 
  ungroup() %>% 
  separate(item_pairs, c("itemA", "itemB"), sep = "_")

#get itemB pair means 
# df_itemB_means <- df_pair_means %>% 
#   group_by(itemB) %>%
#   mutate(mean_itemB_causal_distance = mean(mean_causal_distance, na.rm = TRUE),
#          mean_itemB_freesort_distance = mean(mean_freesort_distance, na.rm = TRUE)) %>%
#   ungroup() 

#add domain labels
df_item_pair_means_domain <- df_pair_means %>%
  left_join(df_combined %>% 
              select(itemA, itemB, domain_itemA, domain_itemB) %>% 
              distinct(),
              by = c("itemA", "itemB")) %>% 
  relocate(domain_itemA, .after = "itemB") %>% 
  relocate(domain_itemB, .after = "domain_itemA") %>% 
  mutate(itemB = factor(itemB, levels = desired_order))

df_item_pair_means_domain_long <- df_item_pair_means_domain %>% 
  pivot_longer(
    cols = c("mean_causal_distance", "mean_freesort_distance"),
    names_to = "mean_type",
    values_to = "values"
  ) 

df_item_pair_means_freesort_minus_causal <- df_item_pair_means_domain_long %>% 
  pivot_wider(names_from = "mean_type",
              values_from = "values") %>% 
  mutate(freesort_minus_causal = mean_freesort_distance - mean_causal_distance)

write.csv(df_item_pair_means_freesort_minus_causal, freesort_minus_causal_distances_output_path, row.names = F)

7.2 Use fminusc values to select Items

#similar item (item with lowest freesort_minus_causal)
similar_items <- df_item_pair_means_freesort_minus_causal %>% 
  group_by(itemB) %>% 
  slice_min(freesort_minus_causal)
 
#causal item (item with highest freesort_minus_causal)
causal_items <- df_item_pair_means_freesort_minus_causal %>% 
  group_by(itemB) %>% 
  slice_max(freesort_minus_causal)

7.3 Create Table of Item distances

# pdf("plot-items.pdf", width = 12, height = 12)  # Width and height are in inches

#visualize
#similar item (item with lowest freesort_minus_causal)
# df_item_pair_means_freesort_minus_causal %>% 
#   group_by(itemB) %>% 
#   arrange(freesort_minus_causal) %>% 
#   ggplot(aes(itemB, freesort_minus_causal)) +
#   geom_point() +
#   geom_text(aes(label = itemA), nudge_x = 0.2, hjust = 0) +  # Moves text slightly to
#   facet_wrap(~itemB) +
#   theme(
#     #axis.text.x = element_text(angle = 90)
#     axis.text.x = element_blank()
#     )

#==========
arranged_items <- df_item_pair_means_freesort_minus_causal %>% 
  group_by(itemB) %>% 
  arrange(desc(freesort_minus_causal)) %>% 
  ungroup() %>% 
  arrange(itemB)

#Generate a unique color for each itemB
unique_items <- unique(arranged_items$itemB)
color_palette <- scales::hue_pal()(length(unique_items))  # Generate distinct colors
color_map <- setNames(color_palette, unique_items)

#Create the table with conditional formatting
arranged_items_table <- arranged_items %>% 
  gt() %>% 
  data_color(
    columns = itemB,  #apply color to this column
    colors = color_map
  )
## Warning: Since gt v0.9.0, the `colors` argument has been deprecated.
## • Please use the `palette` argument to define a color palette.
## This warning is displayed once every 8 hours.
#arranged_items_table #uncomment this to view item distance values

gtsave(arranged_items_table, "figures/item-selection-counterfactual-task.png")

8 Order effects

8.1 causal-causal vs freesort-freesort correlation

df_causal_first <- df_combined %>% 
  filter(trial_type_order == "causation_first")

df_freesort_first <- df_combined %>% 
  filter(trial_type_order == "causation_second")

# Getting causal RDMs
## causation-first group
df_causal_rdm_causation_first <- df_causal_first %>%
  mutate(item_pairs = str_c(itemA, itemB, sep = "-")) %>%
  group_by(item_pairs) %>%
  mutate(mean_causal = mean(causal_distance, na.rm = TRUE)) %>%
  ungroup() %>%
  distinct(item_pairs, .keep_all = TRUE) %>%
  select(itemA, itemB, mean_causal) %>%
  pivot_wider(names_from = itemB, values_from = mean_causal) %>%
  column_to_rownames("itemA")

df_causal_rdm_causation_first <- df_causal_rdm_causation_first[desired_order, desired_order]  #reorder rdm

## freesort-first group
df_causal_rdm_freesort_first <- df_freesort_first %>%
  mutate(item_pairs = str_c(itemA, itemB, sep = "-")) %>%
  group_by(item_pairs) %>%
  mutate(mean_causal = mean(causal_distance, na.rm = TRUE)) %>%
  ungroup() %>%
  distinct(item_pairs, .keep_all = TRUE) %>%
  select(itemA, itemB, mean_causal) %>%
  pivot_wider(names_from = itemB, values_from = mean_causal) %>%
  column_to_rownames("itemA")

df_causal_rdm_freesort_first <- df_causal_rdm_freesort_first[desired_order, desired_order]  #reorder rdm

# Getting freesort RDMs

## causation-first group
df_freesort_rdm_causation_first <- df_causal_first %>%
  mutate(item_pairs = str_c(itemA, itemB, sep = "-")) %>%
  group_by(item_pairs) %>%
  mutate(mean_euclidean = mean(freesort_distance, na.rm = TRUE)) %>%
  ungroup() %>%
  distinct(item_pairs, .keep_all = TRUE) %>%
  select(itemA, itemB, mean_euclidean) %>%
  pivot_wider(names_from = itemB, values_from = mean_euclidean) %>%
  column_to_rownames("itemA")

df_freesort_rdm_causation_first <- df_freesort_rdm_causation_first[desired_order, desired_order]  #reorder rdm

## freesort-first group
df_freesort_rdm_freesort_first <- df_freesort_first %>%
  mutate(item_pairs = str_c(itemA, itemB, sep = "-")) %>%
  group_by(item_pairs) %>%
  mutate(mean_euclidean = mean(freesort_distance, na.rm = TRUE)) %>%
  ungroup() %>%
  distinct(item_pairs, .keep_all = TRUE) %>%
  select(itemA, itemB, mean_euclidean) %>%
  pivot_wider(names_from = itemB, values_from = mean_euclidean) %>%
  column_to_rownames("itemA")

df_freesort_rdm_freesort_first <- df_freesort_rdm_freesort_first[desired_order, desired_order]  #reorder rdm


#flatten the upper tris, for correlation
flatten_rdm <- function(matrix) {
  matrix[upper.tri(matrix)]
}

# Extract upper triangles
causation_rdm_correlation <- cor(flatten_rdm(as.matrix(df_causal_rdm_causation_first)),
                                 flatten_rdm(as.matrix(df_causal_rdm_freesort_first)),
                                 method = "pearson")

freesort_rdm_correlation <- cor(flatten_rdm(as.matrix(df_freesort_rdm_causation_first)),
                                flatten_rdm(as.matrix(df_freesort_rdm_freesort_first)),
                                method = "pearson")

print(paste0("Corr(causal rdm Causal Task first, causal rdm Sorting Task first): " , causation_rdm_correlation))
## [1] "Corr(causal rdm Causal Task first, causal rdm Sorting Task first): 0.951633021558085"
print(paste0("Corr(freesort rdm Causal Task first, freesort rdm Sorting Task first):  " , freesort_rdm_correlation))
## [1] "Corr(freesort rdm Causal Task first, freesort rdm Sorting Task first):  0.864429030575128"
#compute correlation difference
correlation_difference <- causation_rdm_correlation - freesort_rdm_correlation
print(paste0("correlation difference: ", correlation_difference))
## [1] "correlation difference: 0.0872039909829571"

8.2 causal-freesort vs causal freesort correlation

#get upper tris
causation_first_rdm_correlation <- cor(flatten_rdm(as.matrix(df_causal_rdm_causation_first)),
                                 flatten_rdm(as.matrix(df_freesort_rdm_causation_first)),
                                 method = "pearson")

freesort_first_rdm_correlation <- cor(flatten_rdm(as.matrix(df_causal_rdm_freesort_first)),
                                flatten_rdm(as.matrix(df_freesort_rdm_freesort_first)),
                                method = "pearson")

print(causation_first_rdm_correlation)
## [1] -0.04585133
print(paste0("Corr(causal rdm Causal Task first, freesort rdm Causal Task first): " , causation_first_rdm_correlation))
## [1] "Corr(causal rdm Causal Task first, freesort rdm Causal Task first): -0.0458513340083499"
print(paste0("Corr(causal rdm Sorting Task first, freesort rdm Sorting Task first):  " , freesort_first_rdm_correlation))
## [1] "Corr(causal rdm Sorting Task first, freesort rdm Sorting Task first):  0.030704582634035"
#compute correlation difference
correlation_difference_orders <- freesort_first_rdm_correlation - causation_first_rdm_correlation 
correlation_difference_orders
## [1] 0.07655592
print(paste0("Correlation difference: " , correlation_difference_orders))
## [1] "Correlation difference: 0.0765559166423849"
fisher_z <- function(r) {
  return(0.5 * log((1 + r) / (1 - r)))
}

#convert correlations to Fisher's z
z1 <- fisher_z(causation_first_rdm_correlation)
z2 <- fisher_z(freesort_first_rdm_correlation)

N1 <- nrow(df_causal_rdm_causation_first)  
N2 <- nrow(df_causal_rdm_freesort_first)  

SE <- sqrt(1 / (N1 - 3) + 1 / (N2 - 3))

z_diff <- (z2 - z1) / SE #z score for the difference
p_value <- 2 * (1 - pnorm(abs(z_diff)))

print(paste("Z-score:", z_diff))
## [1] "Z-score: 0.187625387273826"
print(paste("P-value:", p_value))
## [1] "P-value: 0.851170323503799"